Load libraries
library(STutility)
library(ggplot2)
library(ggpubr)
library(dplyr)
library(magrittr)
library(UpSetR)
Assemble spaceranger output files and merge curated meta data
# Mouse brain
samples <- Sys.glob(paths = "../data/spaceranger_output/smallintestine/*/filtered_feature_bc_matrix.h5")
imgs <- Sys.glob(paths = "../data/spaceranger_output/smallintestine/*/spatial/tissue_hires_image.png")
spotfiles <- Sys.glob(paths = "../data/spaceranger_output/smallintestine/*/spatial/tissue_positions_list.csv")
json <- Sys.glob(paths = "../data/spaceranger_output/smallintestine/*/spatial/scalefactors_json.json")
infoTable <- data.frame(samples, imgs, spotfiles, json)
infoTable <- cbind(infoTable, arrayID = do.call(rbind, strsplit(infoTable$samples, "/"))[, 5])
curated_metadata <- openxlsx::read.xlsx("../data/sheets/curated_RNA_rescue_sample_metadata.xlsx", sheet = 3)
curated_metadata <- setNames(curated_metadata, nm = c("storage_time", "seq_date", "include", "RNA_rescue",
"project", "experimenter", "processer", "comments",
"ID", "paper_id", "RIN", "DV200", "protocol",
"source", "arrayID", "spots_under_tissue",
"genes_detected", "fraction_spots_under_tissue",
"median_genes_per_spot", "median_UMIs_per_spot",
"saturation", "reads_mapped_to_probe_set",
"reads_mapped_confidently_to_probe_set",
"reads_mapped_confidently_to_filtered_probe_set",
"reads_mapped_to_genome",
"reads_mapped_confidently_to_genome",
"number_of_panel_genes"))
infoTable <- merge(infoTable, curated_metadata, by = "arrayID")
Load data into a Seurat object
SI <- InputFromTable(infoTable %>% arrange(seq_date))
## Using spotfiles to remove spots outside of tissue
## Loading ../data/spaceranger_output/smallintestine/V19T26-028_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V19T26-028_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V19T26-028_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V19T26-028_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-048_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-048_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-048_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-048_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-050_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-050_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-050_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10F24-050_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V10S29-108_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../data/spaceranger_output/smallintestine/V11B18-363_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
##
## ------------- Filtering (not including images based filtering) --------------
## Spots removed: 21
## Genes removed: 6589
## Saving capture area ranges to Staffli object
## After filtering the dimensions of the experiment is: [27127 genes, 46278 spots]
Add a new column with the combined “protocol” and “ID”
SI$protocol_array <- paste0(SI$protocol, " : ", SI$arrayID)
ST.FeaturePlot(SI, features = "nFeature_RNA", ncol = 3, label.by = "protocol_array", show.sb = FALSE, pt.size = 1.5)
## Loading required namespace: viridis
Load H&E images
SI <- LoadImages(SI, time.resolve = FALSE, xdim = 1e3)
## Loading images for 14 samples:
## Reading ../data/spaceranger_output/smallintestine/V19T26-028_A1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 1 image from 1979x2000 pixels to 1000x1011 pixels
## Reading ../data/spaceranger_output/smallintestine/V19T26-028_B1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 2 image from 1979x2000 pixels to 1000x1011 pixels
## Reading ../data/spaceranger_output/smallintestine/V19T26-028_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 3 image from 1979x2000 pixels to 1000x1011 pixels
## Reading ../data/spaceranger_output/smallintestine/V19T26-028_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 4 image from 1979x2000 pixels to 1000x1011 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-048_A1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 5 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-048_B1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 6 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-048_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 7 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-048_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 8 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-050_A1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 9 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-050_B1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 10 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-050_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 11 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10F24-050_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 12 image from 2000x1937 pixels to 1000x968 pixels
## Reading ../data/spaceranger_output/smallintestine/V10S29-108_A1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 13 image from 1979x2000 pixels to 1000x1011 pixels
## Reading ../data/spaceranger_output/smallintestine/V11B18-363_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 14 image from 1979x2000 pixels to 1000x1011 pixels
Apply rigid transformations to obtain a rough alignment of the tissue sections.
# Warp transform
SI <- WarpImages(SI, verbose = TRUE, transforms = list("3" = list(angle = -90), "4" = list(angle = -90),
"5" = list(angle = 160), "6" = list(angle = 160),
"7" = list(angle = 20), "8" = list(angle = 20),
"9" = list(angle = 20), "10" = list(angle = 20),
"11" = list(angle = 20), "12" = list(angle = 20),
"13" = list(angle = -140), "14" = list(angle = 30)))
## Creating dummy masks ...Loading masked image for sample 3 ...
## Warping pixel coordinates for 3 ...
## Warping image for 3 ...
## Warping image mask for 3 ...
## Finished alignment for sample3
##
## Loading masked image for sample 4 ...
## Warping pixel coordinates for 4 ...
## Warping image for 4 ...
## Warping image mask for 4 ...
## Finished alignment for sample4
##
## Loading masked image for sample 5 ...
## Warping pixel coordinates for 5 ...
## Warping image for 5 ...
## Warping image mask for 5 ...
## Finished alignment for sample5
##
## Loading masked image for sample 6 ...
## Warping pixel coordinates for 6 ...
## Warping image for 6 ...
## Warping image mask for 6 ...
## Finished alignment for sample6
##
## Loading masked image for sample 7 ...
## Warping pixel coordinates for 7 ...
## Warping image for 7 ...
## Warping image mask for 7 ...
## Finished alignment for sample7
##
## Loading masked image for sample 8 ...
## Warping pixel coordinates for 8 ...
## Warping image for 8 ...
## Warping image mask for 8 ...
## Finished alignment for sample8
##
## Loading masked image for sample 9 ...
## Warping pixel coordinates for 9 ...
## Warping image for 9 ...
## Warping image mask for 9 ...
## Finished alignment for sample9
##
## Loading masked image for sample 10 ...
## Warping pixel coordinates for 10 ...
## Warping image for 10 ...
## Warping image mask for 10 ...
## Finished alignment for sample10
##
## Loading masked image for sample 11 ...
## Warping pixel coordinates for 11 ...
## Warping image for 11 ...
## Warping image mask for 11 ...
## Finished alignment for sample11
##
## Loading masked image for sample 12 ...
## Warping pixel coordinates for 12 ...
## Warping image for 12 ...
## Warping image mask for 12 ...
## Finished alignment for sample12
##
## Loading masked image for sample 13 ...
## Warping pixel coordinates for 13 ...
## Warping image for 13 ...
## Warping image mask for 13 ...
## Finished alignment for sample13
##
## Loading masked image for sample 14 ...
## Warping pixel coordinates for 14 ...
## Warping image for 14 ...
## Warping image mask for 14 ...
## Finished alignment for sample14
Add a new metadata column with a combined protocol and lung ID label
SI$paper_id <- "SI"
SI$protocol_id <- gsub(pattern = " ", replacement = "_", paste0(SI$protocol, "_ID: ", SI$paper_id))
ST.FeaturePlot(SI, features = "nFeature_RNA", ncol = 3, label.by = "protocol_id")
## Warning: Removed 174 rows containing missing values (geom_point).
Add manual annotations
SI <- ManualAnnotation(SI, type = "raw")
meta.data <- readRDS("../data/R_objects/SI_metadata_selections")
meta.data <- meta.data[colnames(SI), ]
SI@meta.data$labels <- meta.data$labels
SI@meta.data$labels[is.na(SI@meta.data$labels)] <- "background"
submucosa_13 <- rownames(subset(SI@meta.data, arrayID == "V10S29-108_A1" & labels == "background"))
SI@meta.data[submucosa_13, "labels"] <- "submucosa"
Keep only labeled spots.
SI <- SubsetSTData(SI, expression = labels %in% c("mucosa", "submucosa", "muscularis", "TLS", "serosa"))
Load hgenes.tsv containing biotype annotations and add the “mt_protein_coding” and “rb_protein_coding” categories.
ensids <- read.table("../data/genes/hgenes.tsv", header = T)
rownames(ensids) <- ensids$gene_name
ensids$gene_type[grep(pattern = "^MT-", x = ensids$gene_name)] <- "mt_protein_coding"
ensids$gene_type[grep(pattern = "^RPL|^RPS", x = ensids$gene_name)] <- "rb_protein_coding"
First, we add a new column to our meta data with approximate storage
times (time after sample collection). Then we calculate gene attributes:
gene name, gene counts and biotype (gene_type). From this gene attribute
data.frame we can then calculate percentages of each
biotype for each storage time. Note that all biotypes that are not
“IG_C_gene”, “IG_J_gene”, “IG_V_gene”, “TR_C_gene”, “TR_J_gene”,
“TR_V_gene”, “lncRNA”, “protein_coding”, “rb_protein_coding” or
“mt_protein_coding” are pur in a category called “other”.
# Add approximate storage time
SI$time <- SI[[]] %>%
mutate(time = factor(case_when(seq_date == "200323" ~ "~ 1 month",
seq_date == "200923" ~ "~ 6 months",
seq_date %in% c("220404", "220506") ~ "~ 2 years"),
levels = c("~ 1 month", "~ 6 months", "~ 2 years"))) %>% # ifelse(SI$seq_date %in% c("220404", "220506"), "220404-220506", SI$seq_date)
pull(time)
# calculate gene attributes
gene_attr <- do.call(rbind, lapply(levels(SI$time), function(i) {
x <- rowSums(GetAssayData(SI, slot = "counts", assay = "RNA")[, SI$time == i])
data.frame(gene = names(x), count = x, gene_type = ensids[names(x), "gene_type"], time = i)
}))
# Calculate percentages for each storage time
gene_attr <- gene_attr %>% group_by(time, gene_type) %>%
summarize(N = sum(count)) %>%
group_by(time) %>%
mutate(p = N/sum(N)) %>%
mutate(gene_type = factor(ifelse(gene_type %in% c("IG_C_gene", "IG_J_gene", "IG_V_gene",
"TR_C_gene", "TR_J_gene", "TR_V_gene",
"lncRNA", "protein_coding", "rb_protein_coding",
"mt_protein_coding"), gene_type, "other"), levels =
c("protein_coding", "rb_protein_coding",
"mt_protein_coding", "lncRNA", "IG_C_gene", "IG_J_gene", "IG_V_gene",
"TR_C_gene", "TR_J_gene", "TR_V_gene", "other")))
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
gene_attr$time <- factor(gene_attr$time, levels = c("~ 1 month", "~ 6 months", "~ 2 years"))
# Create a color palette
cols <- c("#332288", "#6699CC", "#88CCEE", "#44AA99", "#117733", "#999933", "#DDCC77", "#661100", "#CC6677", "#882255", "#AA4499")
# Create pie charts
p <- ggplot(gene_attr, aes(x = "", y = p, fill = gene_type)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
facet_wrap(~ time, ncol = 2) +
theme(panel.background = element_rect(fill = "white"),
axis.ticks = element_blank(), axis.text.x = element_blank(),
strip.text = element_text(size = 14, colour = "black"),
legend.text = element_text(size = 14, colour = "black"),
legend.position = c(0.8, 0.3)) +
labs(x = "", y = "", fill = "") +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = cols)
p
pdf(file = "plots/biotype_content.pdf", width = 6, height = 6)
print(p)
dev.off()
We can plot the labels for the five regions as a spatial map.
ST.FeaturePlot(SI, features = "labels", ncol = 4, label.by = "time", show.sb = FALSE)
# Create H&E image plots
plots_HE <- lapply(paste0(1:14), function(i) {
im <- GetStaffli(SI)@rasterlists$processed[[i]]
g <- grid::rasterGrob(im, width = unit(1, "npc"), height = unit(1, "npc"), interpolate = TRUE)
ggplot() +
annotation_custom(g, -Inf, Inf, -Inf, Inf) +
theme_void() +
theme(plot.margin = margin(t = 0, r = 0, b = 0, l = 0))
})
# Plot labels
labels_plot <- lapply(1:14, function(i) {
p <- ST.FeaturePlot(SI, features = "labels", indices = i, show.sb = FALSE, pt.size = 0.7,
cols = c("mucosa" = "#AA4499", "submucosa" = "#DDCC77", "muscularis" = "#CC6677", "TLS" = "#77AADD", "serosa" = "#771155")) &
theme(plot.title = element_blank(), legend.position = "none", strip.text = element_blank())
p <- ggrastr::rasterize(p, layers = "Point", dpi = 300)
})
# Combine H&E with label plots for each tissue section
plots <- lapply(1:14, function(i) {
p <- plots_HE[[i]] | labels_plot[[i]]
return(p)
})
# Patch together tissue sections
p1 <- cowplot::plot_grid(plotlist = plots[c(1:4, 13:14)], ncol = 1)
## Warning: Removed 2 rows containing missing values (geom_point).
p2 <- cowplot::plot_grid(plotlist = c(plots[5:8], list(NULL, NULL)), ncol = 1)
## Warning: Removed 77 rows containing missing values (geom_point).
## Warning: Removed 95 rows containing missing values (geom_point).
p3 <- cowplot::plot_grid(plotlist = c(plots[9:12], list(NULL, NULL)), ncol = 1)
# Create final patchwork
p <- p1 + p2 + p3 + patchwork::plot_layout(ncol = 3)
p